% $Id: simequat.tex 204 2009-04-20 01:59:36Z jsibert $
%
% Author: David Fournier
% Copyright (c) 2008 Regents of the University of California
%
%\documentclass{article}
%\usepackage{hyperref}
%\makeindex
%\begin{document}
%\input docmacpdf.tex
%\def\pageno{\thepage}
%\def\aone{\hbox{$a$\kern -1pt $1$}}
%\def\Pone{\hbox{$P$\kern -1pt $1$}}
%\chapno=5
\def\aone{a_{t|t-1}}
\def\Pone{P_{t|t-1}}
\mysection {Simultaneous Equations Models}
For each $t$, $1\le t\le T$ let $y_t$ be an $n$ dimensional vector
and $x_t$ be an n dimensional vector. Let $B$ and~$\Gamma$ 
be~$(n \times n)$  and~$(n \times m)$ matrices and suppose that the relationship
$$By_t + \Gamma x_t=u_t$$
holds where the $u_t$ are $n$~dimensional random vectors of disturbances.
The~$y_t$ are  the endogenous variables in the system. The~$x_t$ are
predetermined  variables in the sense that they are independent of~$u_t$.
Note that for autoregressive models the~$x_t$ may contain values 
of~$y_j$ for~$j<i$. In general not all of the coefficients of~$B$ 
and~$\Gamma$ are estimable. Interesting cases have special structure which
are determined by the particular parameterization of
of~$B$,~~$\Gamma$ and~$D$. In particular it is generally assumed 
that~$B_{ii}=1$ for~$1\le i\le n$ and that~$B^{-1}$ exists.


\mysection{Full Information Maximum Likelihood (FIML)} 
Assume that for each~$t$,~$u_t$ has a multivariate normal distribution
with mean~$0$ and covariance matrix~$D$.
The log-likelihood function for~$B,\Gamma$, and~$D$ is given
by
\begin{equation}
\myeq{
L(B,\Gamma,D)=T/2\log(|B|^2)-T/2\log(|D|)-1/2\sum_{t=1}^T
  [By_t + \Gamma x_t]^\prime D^{-1} [By_t + \Gamma x_t] \label{se:1}
}
\end{equation}
\mysection{Concentrating out $D$ for the FIML}
If there are no constraints on $D$ the value of $D$ which maximizes
\ref{se:1} can be solved for in terms of the other parameters and
observations.This value $\hat D$ is given by
\begin{equation}
\myeq{
\hat D =
1/T\sum_{t=1}^T [By_t + \Gamma x_t]^\prime [By_t + \Gamma x_t] \label{se:2}
}
\end{equation}
substituting this value into $(\number\mychapno.1)$ it can be shown that
$$1/2\sum_{t=1}^T 
  [By_t + \Gamma x_t]^\prime \hat D^{-1} [By_t + \Gamma x_t] $$
is a constant which can be ignored for the maximization so that
equation \ref{se:2}
\begin{equation}
\myeq{
\tilde L(B,\Gamma)=T/2\log(|B|^2)-T/2\log(|\hat D|)\label{se:3} 
}
\end{equation}
and the FIML estimates for~$B$ and~$\Gamma$ can be found by maximizing
$\tilde L(B,\Gamma)$.

When there are constraints on the parameters of~$D$ then~$\tilde D$ is
no longer the maximum likelihood estimate for~$D$ so
 it is necessary to 
maximize \ref{se:1} which is in general a numerically unstable problem. 
To successfully carry out the optimization it is necessary to obtain
reasonable initial estimates for the parameters of~$B$ and~$\Gamma$
and to use a good method for parametrerizing~$D$.
Initial estimates for~$B$ and~$\Gamma$ can be obtained from ordinary
least squares (OLS), that is find the values 
of~$B$ and~$\Gamma$ which minimize
$$\sum_{t=1}^T \|y_t - B^{-1}\Gamma x_t\|^2$$

To parameterize~$D$ note that $\hat D$ is an estimate of~$D$
so that we can parameterize~$D$ by
$$ D = A \hat D A^\prime$$
where~$A$ is a lower triangular matrix. If~$U$ is the choleski
decomposition of~$D$ and~$\hat U$ is the choleski decomposition
of~$\hat D$ then $A=\hat U^{-1}U$.
It follows that~$A$ should be close to the identity matrix which
is a good initial estimate for~$A$. 


\mysection{Evaluating the model's performance} 

To evaluate the model's performance simulated data were generated.
The form of the model is
%$$\eqalign{
 \begin{eqnarray}
   y_{t1}+y_{t4}+y_{t5}-2+0.45y_{t-1,1}&=u_{t1}\cr
   0.1y_{t1}+y_{t2}+2.0y_{t5}-1-0.6y_{t-1,1}+0.25y_{t-1,2}&=u_{t2}\cr
   0.3y_{t1}-0.2y_{t2}+y_{t3}+1&=u_{t3}\cr
   1.4y_{t2}-3.1y_{t3}+y_{t4}+1&=u_{t4}\cr
   y_{t3}+y_{t4}+y_{t5}&=u_{t5}\cr
 \end{eqnarray}
%}
%$$
with a covariance matrix
$$D=\left(\begin{matrix}
0.512&0.32&0.256&-1.28&0\cr
0.32&0.328&-0.16&-0.8&0\cr
0.256&-0.16&1.728&0.16&0.8\cr
-1.28&-0.8&0.16&4.8&0.8\cr
0&0&0.8&0.8&0.928
\end{matrix}
       \right)
$$

$$B=\left(\begin{matrix} 
    1&0&0&1&1\cr
    0.1&1&0&0&2\cr
      0.3&-0.2&1&0&0\cr
      0&1.4&-3.1&1&0\cr
      0&0&1&1&1
  \end{matrix}
  \right)
$$
$$\Gamma=\left(\begin{matrix}-2&0.45&0\cr
         -1&-0.6&0.25\cr
          1&0&0\cr
          1&0&0\cr
          0&0&0
   \end{matrix}\right)
$$
For this model $n=5$, $m=3$, and $x_t=(1,y_{t-1,1},y_{t-1,2})$.

The eigenvalues of~$D$ are $(0.00661 0.13583 0.4962 2.21162 5.44573)$
Having a small eigenvalue tends to produce simulated data that are
difficult to analyze.

Forty time periods of data were generated by the simulator.
The simulated~$y$ values are:
\beginexample
 1.63252 3.00223 1.70246 1.34813 -1.12202
 -2.87857 -7.72402 -3.88482 -1.24196 4.71024
 1.12975 -7.92719 -3.85188 -2.33007 4.2984
 1.48112 -2.25692 -1.15585 -2.26166 3.01061
 -2.91887 -5.65015 -2.74198 -0.695815 4.51346
 2.29715 0.524946 0.0268777 1.07624 -0.0898846
 1.32854 6.17993 2.51613 1.67248 -2.39914
 0.5661 -2.53219 -0.966376 0.00820516 1.76543
 0.353591 -3.81146 -2.04431 -1.48574 2.67208
 -2.22887 -2.33436 -1.66284 0.646399 2.26448
 2.29896 -6.42238 -2.41106 -1.70633 3.50028
 0.145878 1.85161 0.646578 -0.380955 0.709761
 -0.779376 -7.60611 -3.79636 -1.63017 4.02658
 -0.107371 -5.61361 -2.35816 -1.77719 3.67762
 0.662221 -5.78832 -2.19632 -2.03071 4.37961
 -0.570661 -7.42505 -3.28544 -2.94125 5.19422
 0.0953742 -1.80617 -1.06915 -0.0320784 2.00018
 -0.406986 -4.96143 -2.8084 -0.948902 3.1811
 1.07219 -7.92608 -2.95484 -3.17022 5.12702
 -0.495144 1.33611 -0.357291 -0.0260083 0.360653
 -0.637878 -8.76117 -3.81638 -1.77116 4.82796
 1.59717 -3.18571 -1.72708 -1.93975 2.79462
 -1.13013 -2.20942 -1.30198 -0.603895 2.29486
 -1.0103 -7.90106 -3.65303 -1.07367 4.66283
 -1.02985 -3.00268 -1.63388 0.309992 2.97876
 0.176882 -7.96282 -3.60299 -1.86289 4.86943
 1.16904 -1.07952 -0.0969977 -0.74563 2.38399
 -0.636119 -2.84841 -1.43676 -0.38474 2.51142
 -1.72929 -5.39866 -2.51289 0.0978131 3.786
 3.56302 3.79343 2.05613 1.43836 -1.2029
 0.15806 -0.863882 -0.302119 -1.19212 1.38518
 1.37323 -1.94413 -0.537631 -0.751294 1.42083
 -0.404075 -8.53817 -3.58618 -3.33976 5.69071
 0.362091 -5.78568 -2.46635 -2.33359 4.21899
 -2.26158 -12.7075 -6.07426 -3.62455 8.49292
 1.20438 -5.44629 -2.30249 -2.02905 3.82742
 1.41463 -1.71734 -0.788698 -1.90306 2.2595
 0.897156 1.28039 0.693579 0.318737 0.385857
 -0.0330384 -1.55642 -0.189474 0.312385 1.57168
 1.5747 0.827181 1.26032 0.813312 0.270432
\endexample

For the~$x$ values the first time periods data $x_0 =(1,1,2)$
were supplied.  The simulated~$x$ values are:
\beginexample
 1 1 2
 1 1.63252 3.00223
 1 -2.87857 -7.72402
 1 1.12975 -7.92719
 1 1.48112 -2.25692
 1 -2.91887 -5.65015
 1 2.29715 0.524946
 1 1.32854 6.17993
 1 0.5661 -2.53219
 1 0.353591 -3.81146
 1 -2.22887 -2.33436
 1 2.29896 -6.42238
 1 0.145878 1.85161
 1 -0.779376 -7.60611
 1 -0.107371 -5.61361
 1 0.662221 -5.78832
 1 -0.570661 -7.42505
 1 0.0953742 -1.80617
 1 -0.406986 -4.96143
 1 1.07219 -7.92608
 1 -0.495144 1.33611
 1 -0.637878 -8.76117
 1 1.59717 -3.18571
 1 -1.13013 -2.20942
 1 -1.0103 -7.90106
 1 -1.02985 -3.00268
 1 0.176882 -7.96282
 1 1.16904 -1.07952
 1 -0.636119 -2.84841
 1 -1.72929 -5.39866
 1 3.56302 3.79343
 1 0.15806 -0.863882
 1 1.37323 -1.94413
 1 -0.404075 -8.53817
 1 0.362091 -5.78568
 1 -2.26158 -12.7075
 1 1.20438 -5.44629
 1 1.41463 -1.71734
 1 0.897156 1.28039
 1 -0.0330384 -1.55642
\endexample
\mysection{Results of (FIML) for unconstrained $D$} 
For the estinmation process all the elements of the matrices
$B$ and $\Gamma$ with value $0$ were fixed at theere correct value.
The FIML estimates for unconstrained covariance matrix $D$ are given
below.

$$B=\left(\begin{matrix}
1&0&0&0.364395&0.364395\cr
0.238411&1&0&0&1.37593\cr
0.042875&-0.330484&1&0&0\cr
0&1.93026&-4.90367&1&0\cr
0&0&1.06339&1.09851&1
 \end{matrix}\right)
$$

$$\Gamma=\left(\begin{matrix}
-0.917978&0.431377&0\cr
0.473915&-0.491422&0.19981\cr
0.441089&0&0\cr
-0.126701&0&0\cr
0&0&0
\end{matrix}
\right)
$$
$$D=\left(\begin{matrix}
0.938236&0.843743&0.506127&-1.38383&0.314262\cr
0.843743&1.55844&0.732235&-0.591771&1.37195\cr
0.506127&0.732235&0.430091&-0.805866&0.62244\cr
-1.38383&-0.591771&-0.805866&4.18591&-0.0363513\cr
0.314262&1.37195&0.62244&-0.0363513&1.75939
\end{matrix}
\right)
$$
\mysection{Results of (FIML) for constrained $D$} 
Since $D_{51}=0$, and $D_{52}=0$, these values were not well 
estimated by the unconstrained FIML procedure. Suppose that
we know that their values should be $0$ and that the
value of $D_{55}\le1.0$. We incorporate this knowledge
into the model by using penalty functions. 

$$B=\left(\begin{matrix}
1&0&0&0.594218&0.594218\cr
-0.321482&1&0&0&1.97809\cr
0.0416175&-0.365572&1&0&0\cr
0&2.48468&-6.10763&1&0\cr
0&0&0.968057&1.02738&1
\end{matrix}
\right)
$$
$$\Gamma=\left(\begin{matrix}
-1.3418&0.448342&0\cr
-1.14717&-0.593754&0.183788\cr
0.372114&0&0\cr
-0.0640792&0&0\cr
0&0&0
\end{matrix}\right)
$$
$$D=\left(\begin{matrix}
0.593424&-0.325467&0.298836&-1.43231&0.00401209\cr
-0.325467&0.650371&-0.234747&0.441841&4.48456e-06\cr
0.298836&-0.234747&0.258147&-0.900105&0.255262\cr
-1.43231&0.441841&-0.900105&6.13619&0.180376\cr
0.00401209&4.48456e-06&0.255262&0.180376&1.00262
\end{matrix}
\right)
$$
\mysection{Code for (FIML) for constrained $D$} 

This is the code in the TPL file split up by sections and
commented on. The {\tt DATA\_SECTION} defines the data and some size
aspects of the model structure. Objects which are prefixed by {\tt init\_}
will be read in from the data file.
\beginexample
   
  // This version incorporates constraints via penalty functions.
  //This is sample code to determine the parameters of a
  //sim,ultaneous equations model. The notation follows
  //that of Hamilton, Times Series Analysis, chapter 9.
  //the general form of the model is
  //
  //    By_t + Gamma x_t =u_t
  //
  //for t=1,...,T. The u_t have covariance matrix D.

DATA_SECTION
  init_int T // the number of observations
  init_int dimy // dimension of the vector of 
                // endogenousavariables
  init_int dimx // dimension of the vector of 
                // predetermined variables 
  init_int num_Bpar // the number of parameters in
                // the elements of B to be estimated 
  init_int num_Gpar // the number of parameters in
                // the elements of Gamma to be estimated 
  init_matrix y(1,T,1,dimy) // the y_t
  init_matrix x(1,T,1,dimx) // the x_t
  int dimy1
 !! dimy1=dimy*(dimy+1)/2;  // size of symmetric matrix
\endexample

The {\tt PARAMETER\_SECTION} describes the model's parameters.
Objects which are prefixed by {\tt init\_} are the independent
variables of the model.  For example {\tt Bpar} is used to
parameterize the nonzero elements of {\tt B}.
{\tt ch\_Dpar} is used to parameterize the lower triangular matrix
of the correction from {\tt emp\_D} to the covariance matrix
{\tt D}. The minimization is done in a number of phases.
The parameter {\tt kx} is used to have a parameter which becomes active
in phase 4, so that the minimization will take place in four stages.
This parameter does not enter into the ``real'' part of the model.

\beginexample
PARAMETER_SECTION
  init_vector Bpar(1,num_Bpar)
  init_vector Gpar(1,num_Gpar)
  init_vector ch_Dpar(1,dimy1,2)
  matrix B(1,dimy,1,dimy)
  matrix D(1,dimy,1,dimy)  // the covariance matrix for the 
                           // disturbances u_t
  matrix emp_D(1,dimy,1,dimy)  // the covariance matrix for the 
  matrix Gamma(1,dimy,1,dimx)
  matrix ch_D(1,dimy,1,dimy)
  matrix z(1,T,1,dimy);
  objective_function_value f
  init_number kx(4);
\endexample

The {\tt PROCEDURE\_SECTION} is where the models calculations
are carried out. It is split up into a set of functions wheere the
model specific pieces of code (different code for different models)
are located. Finally  the optimization for parameter estimation is
calculated. Thsi depends on the phase of the optimzation procedure.
A {\tt switch} statment is used to vary the form of the objective
function depending on the phase. The function {\tt current\_phase()}
return the number of the current phase of the optimization.
The function {\tt last\_phase()}
returns 1 (``true'') if the current phase is the last phase of the
optimization. Quadratic penalty functions are put on the model's
parameters and the penalty weights are decreased in subsequent phase.
this proecedure helsp to stabilize the optimzation when several
model parametes are highly correlated.


\beginexample
PROCEDURE_SECTION
  fill_B();    // this will vary from model to model
  fill_Gamma();  // this will vary from model to model

  calculate_empirical_copvariance_matrix();

  fill_D();  // this will vary from model to model

  calculate_constraints();  // this will vary from model to model

  int sgn;
  switch (current_phase())
  {
  case 1:
    {
      f+=0.1*norm2(Bpar);
      f+=0.1*norm2(Gpar);
      f+=0.1*norm2(ch_Dpar);
      dvar_matrix Binv=inv(B);
      for (int t=1;t<=T;t++)
      {
        dvar_vector z=y(t)+Binv*Gamma*x(t);
        f+=z*z;
      }
      break;
    }
  default: 
    {
      f+= -0.5*T*log(square(det(B)))
       +0.5*T*ln_det(D,sgn);
  
      dvar_matrix Dinv=inv(D);
      dvariable f1=0.0;
      for (int t=1;t<=T;t++)
      {
        dvar_vector z=B*y(t)+Gamma*x(t);
        f1+=z*(Dinv*z);
      }
      f+=0.5*f1;
      if (!last_phase())
      {
        f+=0.1*norm2(Bpar);
        f+=0.1*norm2(Gpar);
        f+=0.1*norm2(ch_Dpar);
      }
      else
      {
        f+=0.001*norm2(Bpar);
        f+=0.001*norm2(Gpar);
        f+=0.001*norm2(ch_Dpar);
      }
    }
  }
  
  f+=square(kx);

FUNCTION fill_B
  B.initialize();
  for (int i=1;i<=dimy;i++)
    B(i,i)=1.0;

  // this is part of the special structure of the model
  int ii=1;
  B(2,1)=Bpar(1);
  B(3,1)=Bpar(2);

  B(3,2)=Bpar(3);
  B(4,2)=Bpar(4);

  B(4,3)=Bpar(5);
  B(5,3)=Bpar(6);

  B(5,4)=Bpar(7);
  B(1,4)=Bpar(8);

  B(1,5)=Bpar(8);
  B(2,5)=Bpar(9);


FUNCTION fill_Gamma
  Gamma.initialize();

  // this is the part of special structure of the model
  Gamma(1,1)=Gpar(1);
  Gamma(2,1)=Gpar(2);
  Gamma(3,1)=Gpar(3);
  Gamma(4,1)=Gpar(4);

  Gamma(1,2)=Gpar(5);  
  Gamma(2,2)=Gpar(6);  


  Gamma(2,3)=Gpar(7);  


FUNCTION fill_D

  ch_D.initialize();
  // this is the special structure of the model
  int ii=1;
  for (int i=1;i<=dimy;i++)
  {
    for (int j=1;j<=i;j++)
      ch_D(i,j)=ch_Dpar(ii++);
    ch_D(i,i)+=1;
  }

  D=ch_D*emp_D*trans(ch_D); // so Ch_D is the Cholesky
                      // decomposition of D
FUNCTION calculate_empirical_covariance_matrix

  for (int t=1;t<=T;t++)
    z(t)=B*y(t)+Gamma*x(t);

  emp_D=empirical_covariance(z);
  
FUNCTION  calculate_constraints

  double wt=1.0;
  switch (current_phase())
  {
  case 1:
    wt=1.0;
    break;
  case 2:
    wt=10.0;
    break;
  case 3:
    wt=100.0;
    break;
  default:
    wt=1000.0;
    break;
  }
  if (D(5,5)>1.0)
    f+=wt*square(D(5,5)-1.00);

  f+=wt*square(D(5,1));
  f+=wt*square(D(5,2));
\endexample
The {\tt REPORT\_SECTION} prints out a report of the imodel's results.

\beginexample
REPORT_SECTION
  report << "B" << endl;
  report << B << endl;
  report << "Gamma" << endl;
  report << Gamma << endl;
  report << "D" << endl;
  report << D << endl;
  report << "eigenvalues of D" << endl;
  report << eigenvalues(D) << endl;
  report << "y" << endl;
  report << y << endl;
  report << "x" << endl;
  report << x << endl;
\endexample

